#!/usr/bin/python
#coding=utf-8
import numpy as np
import pandas as pd
from openpyxl import load_workbook
from xlrd import open_workbook
import csv
from csv import reader
import re
import os
from matplotlib import pyplot
from matplotlib import style
style.use("ggplot")
#%matplotlib notebook
import sys
import operator
import datetime
import time
import matplotlib.pyplot as plt
def diffdates(d1, d2):
#Date format: %Y-%m-%d %H:%M:%S
return (time.mktime(time.strptime(d2,'%Y-%m-%d %H:%M:%S')) - time.mktime(time.strptime(d1,'%Y-%m-%d %H:%M:%S')))
filename1 = 'bigdata_correct.xlsx'
df = pd.read_excel(filename1, sheetname='Data')
df = df[df.IdentNo.str[-2:] == "_G"]
# Add two more columns
print ("start")
df = df.values
a,b = df.shape
z = np.zeros((a,2))
df = np.hstack((df,z))
a,b = df.shape
print ("end")
#listing all unique products
print ("start")
varia = 0
list1 = []
while (varia < a):
vari = str(df[varia][2]) + "_" + str(df[varia][3])
t = len(list1)
flag = 0
for i in range(t):
if list1[i] == vari:
flag = 1
break
if (flag == 0):
list1.append(vari)
varia += 1
unique_sets = len(list1)
print (list1)
print (unique_sets)
print ("end")
#assigning product number depending on TTNR to the data-set
print ("start")
for i in range(a):
concat= str(df[i][2]) + "_" + str(df[i][3])
for ii in range(unique_sets):
if (list1[ii] == concat):
df[i][4] = ii + 1
break
print ("end")
print ("start")
timers = 180
threshold_analysis = []
individual_variations = []
threshold_analysis.append([])
flag = 0
a,b = df.shape
count = 0
value_prev = "0"
date_prev = "0"
final_array = []
total_size = 0
for i in range(a):
total_size += 1
if flag == 0:
final_array.append([])
final_array[count].append(df[i][4])
final_array[count].append(df[i][1])
total_size = 1
flag = 1
elif df[i][4] != value_prev:
final_array[count].append(date_prev)
final_array[count].append(total_size - 1)
count += 1
final_array.append([])
final_array[count].append(df[i][4])
final_array[count].append(df[i][1])
total_size = 1
else:
t1 = pd.to_datetime(date_prev)
t2 = pd.to_datetime(df[i][1])
time = pd.Timedelta(t2 - t1).seconds + pd.Timedelta(t2 - t1).days*24*60*60
if (time > timers):
final_array[count].append(date_prev)
final_array[count].append(total_size - 1)
count = count + 1
final_array.append([])
final_array[count].append(df[i][4])
final_array[count].append(df[i][1])
total_size = 1
df[i][5] = count + 1
value_prev = df[i][4]
date_prev = df[i][1]
if (i == a-1):
final_array[count].append(date_prev)
final_array[count].append(total_size - 1)
print (final_array)
print (type(final_array))
np.savetxt("product_type_correct.csv", df, fmt="%s", delimiter=",")
np.savetxt("timelist_correct.csv", final_array, fmt="%s", delimiter=",")
length_list = len(final_array)
print (length_list)
buckets = [0] * (unique_sets+1)
for i in range(length_list):
a = final_array[i][0]
buckets[a] += 1
print (buckets)
###Code for generating file, alerady generated so using the generated one
filename2 = 'analyses_data_corrected.xlsx'
pro_data = pd.read_excel(filename2, sheetname='RealtimeData')
pro_data['TIME'] = pd.to_datetime(pro_data['TIME'])
pro_data = pro_data.values
a,b = pro_data.shape
z = np.zeros((a,2))
pro_data = np.hstack((pro_data,z))
a,b = pro_data.shape
i = 0
print ("start")
while (i < a):
t3 = pro_data[i][0]
flag = 0
j = 0
while (j < length_list):
t1 = pd.to_datetime(final_array[j][1])
t2 = pd.to_datetime(final_array[j][2])
time1 = pd.Timedelta(t2 - t3).seconds + pd.Timedelta(t2 - t3).days*24*60*60
time2 = pd.Timedelta(t3 - t1).seconds + pd.Timedelta(t3 - t1).days*24*60*60
time3 = pd.Timedelta(t2 - t1).seconds + pd.Timedelta(t2 - t1).days*24*60*60
if (time1 >= 0 and time2 >= 0 and time3 >= 0 and (time1+time2 == time3)):
pro_data[i][b-2] = final_array[j][0]
pro_data[i][b-1] = j
flag = 1
break
j += 1
if (flag == 0):
pro_data = np.delete(pro_data, i, 0)
i = i - 1
a,b = pro_data.shape
i += 1
df = pd.DataFrame(pro_data)
filepath = 'my_excel_file_corrected.xlsx'
df.to_excel(filepath, index=False)
########################NEWWWWWWWWWWWWWW########
filename4 = 'my_excel_file1_corrected.xlsx'
jj = []
counts = 0
print (len(final_array))
for i in range(len(final_array)):
print (i)
df1 = pd.read_excel(filename4, sheetname='Sheet1')
df1 = df1[["TIME","ACTIVEPOWER","PRODUCT_TYPE", "BATCH_TYPE", "REALTIME_E"]]
df1 = df1[df1.BATCH_TYPE == i]
df1 = df1.values
a,b = df1.shape
if (a == 0):
continue
counts += 1
p = df1[a-1][4]
q = df1[0][4]
energy = p - q
quantitites_value = final_array[i][3]
e_by_q = (energy*1.00)/quantitites_value
jj.append([])
jj[counts-1].append(final_array[i][0])
jj[counts-1].append(e_by_q)
######new end##########################
print (jj)
jjj = pd.DataFrame(jj)
print (jjj)
#jjj.to_excel('e_by_q.xlsx', index=False)
'''
filename3 = 'e_by_q.xlsx'
for i in range(unique_sets):
#colors = np.random.rand(i)
df1 = pd.read_excel(filename3, sheetname='Sheet1')
#df1 = df1[df1.PRODUCT_TYPE == i]
y_label = "Energy difference divided by quantity kWh"
plt.figure(1, figsize=(10,10))
plt.subplot(211)
#plt.plot(df1['PRODUCT_TYPE'], df1['E_by_Q'], 'coral', label='positive error')
plt.scatter(df1['PRODUCT_TYPE'], df1['E_by_Q'], alpha=0.5)
#plt.title("product type" + str(i))
plt.xlabel('product number')
plt.ylabel(y_label)
plt.legend()
plt.tight_layout()
plt.show()
break
'''
filename3 = 'e_by_q.xlsx'
for i in range(unique_sets):
#colors = np.random.rand(i)
df1 = pd.read_excel(filename3, sheetname='Sheet1')
#df1 = df1[df1.PRODUCT_TYPE == i]
y_label = "Energy difference divided by quantity kWh"
#plt.figure(1, figsize=(10,10))
#plt.subplot(211)
#plt.plot(df1['PRODUCT_TYPE'], df1['E_by_Q'], 'coral', label='positive error')
#plt.scatter(df1['PRODUCT_TYPE'], df1['E_by_Q'], alpha=0.5)
#plt.title("product type" + str(i))
#plt.xlabel('product number')
#plt.ylabel(y_label)
fig, ax = plt.subplots(figsize=(20, 20))
ax.scatter(df1['PRODUCT_TYPE'], df1['E_by_Q'], alpha=0.5)
ax.set_xlabel('product number')
ax.set_ylabel(y_label)
ax.set_xticks(np.arange(unique_sets + 1))
#for tick in ax.get_xticklabels():
# tick.set_visible(True)
plt.legend()
plt.tight_layout()
plt.show()
break
#filename3 = 'POWER_TTNR.xlsx'
filename3 = 'my_excel_file1_corrected.xlsx'
i = 1
############code addded on 22/6/2018#######
print (list1)
mean_standard = []
quantities_n = []
#########end############
#fig, axs = plt.subplots(unique_sets,1,figsize=(100,300))
#plot_count = 0
while (i <= unique_sets):
df1 = pd.read_excel(filename3, sheetname='Sheet1')
df1 = df1[df1.PRODUCT_TYPE == i]
df1 = df1[["TIME","ACTIVEPOWER","PRODUCT_TYPE", "BATCH_TYPE"]]
x = df1.BATCH_TYPE.unique()
################Code started for plotting graph ###################
'''
y_label = "Real Time Power of Product " + str(i)
plt.subplot(221)
plt.plot(df1['TIME'], df1['ACTIVEPOWER'])
plt.title(str(i))
plt.xlabel('time-stamp')
plt.ylabel(y_label)
plt.show()
'''
####################Code ended for plotting graph###########
#df1 = df1.values
#a,b = df1.shape
jj = []
for kk in range(len(x)):
jj.append([])
df2 = df1[df1.BATCH_TYPE == x[kk]]
df2 = df2.values
aa, bb = df2.shape
ref_time = 0
for ll in range(aa):
jj[kk].append(df2[ll][1])
jj = pd.DataFrame(jj)
median_var = jj.median()
standard_deviation_from_mean_value = jj.std()
max_value = jj.max()
min_value = jj.min()
mean_value = jj.mean()
kkkkkkk = pd.Series.tolist(median_var)
kkkkkkk1 = pd.Series.tolist(standard_deviation_from_mean_value)
kkkkkkk1_numpy = np.asarray(kkkkkkk1)
ppp = kkkkkkk1_numpy.size
countttt_batch_size = 0
for variables in range(ppp):
if np.isnan(kkkkkkk1_numpy[variables]):
countttt_batch_size += 1
mean_std_dev = np.nanmean(kkkkkkk1_numpy)
mean_standard.append(mean_std_dev)
quantities_n.append(ppp - countttt_batch_size)
print("standard deviation is " + str(mean_std_dev))
kkkkkkk2 = pd.Series.tolist(max_value)
kkkkkkk3 = pd.Series.tolist(min_value)
kkkkkkk4 = pd.Series.tolist(mean_value)
kkkkkkk5 = [b_i - a_i for a_i, b_i in zip(kkkkkkk1, kkkkkkk4)]
kkkkkkk6 = [b_i + a_i for a_i, b_i in zip(kkkkkkk1, kkkkkkk4)]
y_a = list(range(0, len(kkkkkkk)))
y_label = "Real Time Power of Product " + str(i) + " kW"
plt.figure(1, figsize=(20,10))
plt.subplot(211)
plt.plot(y_a, kkkkkkk, 'r', label='median/predicted')
#plt.plot(y_a, kkkkkkk1, 'k', label='standard deviation from mean value')
plt.plot(y_a, kkkkkkk5, 'brown', label='negative error')
plt.plot(y_a, kkkkkkk6, 'coral', label='positive error')
plt.title("product type" + str(i))
plt.xlabel('time-stamp in minutes')
plt.ylabel(y_label)
plt.legend()
plt.tight_layout()
plt.subplot(212)
plt.plot(y_a, kkkkkkk4, 'bx', label='mean power')
plt.plot(y_a, kkkkkkk2, 'y', label='maximum power')
plt.plot(y_a, kkkkkkk3, 'g', label='minimum power')
plt.title("product type" + str(i))
plt.xlabel('time-stamp in minutes')
plt.ylabel(y_label)
plt.legend()
plt.tight_layout()
plt.show()
##########second phase of code ending#########
i += 1
#plot_count += 1
#print (type(mean_standard))
print (mean_standard)
print (quantities_n)
##list1
###mean_standard
###quantities_n
print (mean_standard)
print (quantities_n)
print (list1)
list1_new = []
mean_standard_new = []
quantities_n_new = []
product_number = []
for i in range(len(mean_standard)):
if str(mean_standard[i]) != 'nan':
mean_standard_new.append(mean_standard[i])
quantities_n_new.append(quantities_n[i])
list1_new.append(list1[i])
product_number.append(i)
mean_standard = mean_standard_new
quantities_n = quantities_n_new
list1 = list1_new
print (mean_standard)
print (quantities_n)
print (list1)
print (product_number)
width = 0.5
y_pos = np.arange(len(list1))
fig, ax = plt.subplots(figsize=(80, 40))
rects = ax.bar(y_pos, mean_standard, width ,color='r', align='edge' ,alpha=0.5)
ax.set_xticks(y_pos + width/2.)
ax.set_xticklabels(list1, fontsize=10)
ax.set_xlabel('TTNR')
ax.set_ylabel('standard deviation')
plt.legend()
def autolabel(rects,ii):
for rect in rects:
height = rect.get_height()
values = quantities_n[ii]
value1 = product_number[ii]
value2 = mean_standard[ii]
ii += 1
ax.text(rect.get_x() + rect.get_width()/2., 1.08*height,
'%s & prod. %d & std %f' %(str(values) , value1, value2),
ha='center', va='bottom', fontsize=40)
autolabel(rects, 0)
plt.tight_layout()
plt.show()